## A Dynamic model of endogenous fishing duration
## Simulation main code
## #
## # So, we need three daily (within-trip) behavioral models to explain the point.
## # 1. Homogeneous, no freshness deterioration model.
## #    This model is to explain what "trip level frontier" means.
## #    There is no "true model" for the trip level frontier, as we specify that it is generated from the daily level behavior. Hence, the trip level frontier is something aggegated prodiction of daily level catch IF the exogenous effort assumption is true, (which is not).  We run this model and estimate with the trip level data to get "true" parameter of trip level production.
## # 2. Homegenous, freshness deterioration
## #    This is true model we estimated in the main section. Hence, we assume that this is the true DGP, and see what is going to happen if we run trip-level fronrier analysis to the data generated by this model.
## #    There may be two points: the estimated coefficient on the effort (day at sea) is "biased", relative to "true" effort coefficient in model 1. Also, evaluation using the estimated frontier may be correlated with the freshness-loss. Why?
## # 3. Heterogeneous, freshness deterioration
## #    This is based on the reviewer's suggestion. Even though there exists freshness loss aversion and associated bias, there still may be inefficiencies. Heterogeity is in the ability to catch, so we model it by differing the expected daily amount of catch by vessels, but daily stochasticity remains.
## 
## sim_trips = 500


## ----library -----

pacman::p_load(tidyverse,
               lubridate, 
               ggthemes,
               MASS, 
               stargazer,    # generate regression table
               foreign,      # for Stata data
               glmnet,       # Lasso estimation
               psych,        # for summary statistics
               knitr,        # kable
               kableExtra,   # fancy table
               Benchmarking, # DEA analysis
               frontier,     # SFA analysis
               estimatr,     # Robust Estimator
               patchwork,    # easy layout of ggplots
               extrafont,    # ggplot fonts
               huxtable,     # use huxtable
               recipes)      # to use discretize 

## ---- Define functions ------------

'%notin%' <- Negate('%in%')


## ----simulation data -------------------------------------------

data_trip_sec1 = read_csv("data_trip_sec1_2021-03-03.csv")
data_trip_sec2 = read_csv("data_trip_sec2_2021-03-03.csv")
data_trip_sec2_hetero = read_csv("data_trip_sec2_hetero_2021-03-03.csv")


## ----sim DEA estimation ----------------------------------------
# DEA for endogenous trip
x2 <- as.matrix(data_trip_sec2$day)
y2 <- as.matrix(data_trip_sec2$cum_catch)

x1 <- as.matrix(data_trip_sec1$day)
y1 <- as.matrix(data_trip_sec1$cum_catch)
yy1 <- as.matrix(data_trip_sec1$add_catch)
y1_hetero <- as.matrix(data_trip_sec1$cum_catch_hetero)

x2_hetero <- as.matrix(data_trip_sec2_hetero$day)
y2_hetero  <- as.matrix(data_trip_sec2_hetero$cum_catch_hetero)
yy2_hetero  <- as.matrix(data_trip_sec2_hetero$add_catch_hetero)


dea_sec1_homo = Benchmarking::dea(x1,y1,RTS="vrs")
##  Benchmarking::dea.plot.frontier(x1,y1,RTS="vrs")

dea_sec2_homo = Benchmarking::dea(x2,y2,RTS="vrs")
##  Benchmarking::dea.plot.frontier(x2,y2,RTS="vrs")

dea_sec1_hetero = Benchmarking::dea(x1,y1_hetero,RTS="vrs")
##Benchmarking::dea.plot.frontier(x1,y1_hetero,RTS="vrs")

dea_sec2_hetero = Benchmarking::dea(x2_hetero,y2_hetero,RTS="vrs")
##  Benchmarking::dea.plot.frontier(x2_hetero,y2_hetero,RTS="vrs")


## ----simulation SFA estimation ----------------------------------------

# Scenario 1: Homogeneous, planned stopping
sfa_sec1_homo = frontier::sfa(cum_catch ~ day,data_trip_sec1)
##summary(sfa_sec1_homo)
sfa_sec1_homo_log = frontier::sfa(log(cum_catch) ~ log(day),data_trip_sec1)
ols_sec1_homo_log = lm(log(cum_catch) ~ log(day),data_trip_sec1)
##summary(sfa_sec1_homo_log)
sfa_sec1_homo_fe = frontier::sfa(cum_catch ~ day + factor(ves),data_trip_sec1)
##summary(sfa_sec1_homo_fe)
sfa_sec1_homo_log_fe = frontier::sfa(log(cum_catch) ~ log(day) + factor(ves),data_trip_sec1)
sum_sfa_sec1_homo_log_fe = summary(sfa_sec1_homo_log_fe)

# Scenario 2: Homogeneous, endogeneous stopping
sfa_sec2_homo = frontier::sfa(cum_catch ~ day,data_trip_sec2)
##summary(sfa_sec2_homo)
sfa_sec2_homo_log = frontier::sfa(log(cum_catch) ~ log(day),data_trip_sec2)
##summary(sfa_sec2_homo_log)
sfa_sec2_homo_fe = frontier::sfa(cum_catch ~ day + factor(ves),data_trip_sec2)
sum_sfa_sec2_homo_fe = summary(sfa_sec2_homo_fe)
sfa_sec2_homo_log_fe = frontier::sfa(log(cum_catch) ~ log(day) + factor(ves),data_trip_sec2)
sum_sfa_sec2_homo_log_fe = summary(sfa_sec2_homo_log_fe)



# Scenario 3:  Heterogeneous, planned stopping
sfa_sec1_hetero = frontier::sfa(cum_catch_hetero ~ day,data_trip_sec1)
##summary(sfa_sec1_hetero)
sfa_sec1_hetero_fe = frontier::sfa(cum_catch_hetero ~ day + factor(ves),data_trip_sec1)
##summary(sfa_sec1_hetero_fe)
sfa_sec1_hetero_log = frontier::sfa(log(cum_catch_hetero) ~ log(day),data_trip_sec1)
##summary(sfa_sec1_hetero_log)
sfa_sec1_hetero_log_fe = frontier::sfa(log(cum_catch_hetero) ~ log(day) + factor(ves),data_trip_sec1)
##summary(sfa_sec1_hetero_log_fe)
sfa_sec1_hetero_log_slope = frontier::sfa(log(cum_catch_hetero) ~ log(day) + log(day):factor(ves%%2) + factor(ves%%2),data_trip_sec1)
##summary(sfa_sec1_hetero_log_slope)

# Scenario 4:  Heterogeneous, endogeneous stopping
sfa_sec2_hetero = frontier::sfa(cum_catch_hetero ~ day,data_trip_sec2_hetero)
##summary(sfa_sec2_hetero)
sfa_sec2_hetero_fe = frontier::sfa(cum_catch_hetero ~ day + factor(ves),data_trip_sec2_hetero)
##summary(sfa_sec2_hetero_fe)

sfa_sec2_hetero_log = frontier::sfa(log(cum_catch_hetero) ~ log(day),data_trip_sec2_hetero)
#summary(sfa_sec2_hetero_log)
sfa_sec2_hetero_log_fe = frontier::sfa(log(cum_catch_hetero) ~ log(day) + factor(ves),data_trip_sec2_hetero)
##summary(sfa_sec2_hetero_log_fe)
sfa_sec2_hetero_log_slope = frontier::sfa(log(cum_catch_hetero) ~ log(day) + log(day):factor(ves%%2) + factor(ves%%2),data_trip_sec2)
##summary(sfa_sec2_hetero_log_slope)


## ----sim_table, eval = TRUE--------------------------------------------
# --------- Main table for estimated coefficients on "days"----------
sim_tab <- bind_rows(
  # raw specification
  summary(sfa_sec1_homo)$mleParam["day",c("Estimate","Std. Error")],
  summary(sfa_sec2_homo)$mleParam["day",c("Estimate","Std. Error")],
  summary(sfa_sec1_hetero)$mleParam["day",c("Estimate","Std. Error")],
  summary(sfa_sec2_hetero)$mleParam["day",c("Estimate","Std. Error")],
  # log-log specification
  summary(sfa_sec1_homo_log)$mleParam["log(day)",c("Estimate","Std. Error")],
  summary(sfa_sec2_homo_log)$mleParam["log(day)",c("Estimate","Std. Error")],
  summary(sfa_sec1_hetero_log)$mleParam["log(day)",c("Estimate","Std. Error")],
  summary(sfa_sec2_hetero_log)$mleParam["log(day)",c("Estimate","Std. Error")],
  # fixed effects, raw
  summary(sfa_sec1_homo_fe)$mleParam["day",c("Estimate","Std. Error")],
  summary(sfa_sec2_homo_fe)$mleParam["day",c("Estimate","Std. Error")],
  summary(sfa_sec1_hetero_fe)$mleParam["day",c("Estimate","Std. Error")],
  summary(sfa_sec2_hetero_fe)$mleParam["day",c("Estimate","Std. Error")],
  # fixed effects, log-log
  summary(sfa_sec1_homo_log_fe)$mleParam["log(day)",c("Estimate","Std. Error")],
  summary(sfa_sec2_homo_log_fe)$mleParam["log(day)",c("Estimate","Std. Error")],
  summary(sfa_sec1_hetero_log_fe)$mleParam["log(day)",c("Estimate","Std. Error")],
  summary(sfa_sec2_hetero_log_fe)$mleParam["log(day)",c("Estimate","Std. Error")]
) %>%
  round(.,digits = 3) %>%
  bind_cols(c(rep("Linear",4),rep("Log-log",4),rep("Linear,FE",4),rep("Log-log,FE",4)),
            c(rep(c("Homo","Homo","Hetero","Hetero"),4)),
            c(rep(c("Sec.1","Sec.2"),8))) %>%
  tibble() %>%
  pivot_longer(cols = c(`Estimate`,`Std. Error`), 
               names_to = "var", 
               values_to = "values") %>%
  pivot_wider(id_cols = c(`...3`,var), 
              values_from = values, 
              names_from = c(`...4`,`...5`)) %>%
  mutate(across(.cols = 3:6, .fns = ~ifelse(var == "Std. Error", 
                                            paste0("(",format(.,digits = 3),")"),
                                            format(.,digits = 3)))) %>%
  mutate(`...3` = ifelse(var == "Estimate",`...3`,"")) %>%
  dplyr::select(-2) # remove "var"



## ----sim_SFA_table, include = TRUE, results= 'asis', eval = TRUE-------
knitr::kable(sim_tab,
             align = "c",
             booktabs = TRUE,
             col.names = c("Specification", "Planned","Endogeneous","Planned","Endogeneous"),
             caption = "\\label{tab:sim}The estimated coefficients on trip days in stochastic frontier estimation"
) %>% 
  kable_paper() %>%
  add_header_above(c("","Homogeneous" = 2,"Heterogeneous" = 2)) %>%
  footnote(general = "Standard errors are shown in the parentheses")


## ----scatter_homo_draw, eval = TRUE------------------------------------
# ----- plots: scatter plot + estimated frontier ---------------

# --- Homogeneous data ----

fun.sec1 = function(x) sfa_sec1_homo$mleParam["(Intercept)"] + sfa_sec1_homo$mleParam["day"]*x
fun.sec1_log = function(x) exp(sfa_sec1_homo_log$mleParam["(Intercept)"] + sfa_sec1_homo_log$mleParam["log(day)"]*log(x))


plot_trip_sec1 = ggplot(data_trip_sec1,aes(x = day, y = cum_catch, col =fresh_loss)) +
  geom_jitter(size=0.5, alpha = 0.6) + 
  stat_function(aes(linetype = "Linear"), geom = "line", col = "red",fun = fun.sec1) + 
  stat_function(aes(linetype = "Log"), geom = "line", col = "red",fun = fun.sec1_log) +
  scale_color_viridis_c(limits =c(0,0.6)) + 
  ylim(8,43) + 
  xlim(20,50) + 
  labs(x = "Day", y = "Catch per trip", col = "Fresh. loss",linetype = "Spec.",
       title = "Planned Stopping scenario") +
  theme_bw()


fun.sec2 = function(x) sfa_sec2_homo$mleParam["(Intercept)"] + sfa_sec2_homo$mleParam["day"]*x
fun.sec2_log = function(x) exp(sfa_sec2_homo_log$mleParam["(Intercept)"] + sfa_sec2_homo_log$mleParam["log(day)"]*log(x))


plot_trip_sec2 = ggplot(data_trip_sec2,aes(x = day, y = cum_catch, col =fresh_loss)) +
  geom_jitter(size=0.5, alpha = 0.6) + 
  stat_function(aes(linetype = "Linear"), geom = "line",col = "red",fun = fun.sec2) +
  stat_function(aes(linetype = "Log"), geom = "line",col = "red",fun = fun.sec2_log) +
  scale_color_viridis_c(limits =c(0,0.6)) + 
  ylim(8,43) + 
  xlim(20,50) + 
  labs(x = "Day", y = "Catch per trip",  col = "Fresh. loss",linetype = "Spec.",
       title = "Endogenous Stopping Scenario") +
  theme_bw()


## ----plot_sim_scatter, eval = TRUE, fig.cap="\\label{fig:sim_homo_scatter}Scatter plot of trip level catch in days and estimated frontiers." ,fig.width = 10,fig.height=6----
# ****** plot used in the paper *******
plot_trip_sec1 + plot_trip_sec2 + plot_layout(guides = "collect") & theme(legend.position = "bottom") 


## ----plot_sim_scatter2, eval = FALSE-----------------------------------
## # ****** plot for pub *******
## cairo_ps("figures/pub/sim_homo_scatter.eps",width = 10, height = 6)
##   plot_trip_sec1 + plot_trip_sec2 + plot_annotation(tag_levels = 'A') + plot_layout(guides = "collect") & theme(legend.position = "bottom",
##       text = element_text(family = "Times New Roman"))
## dev.off()


## ----eff_calc, eval = TRUE---------------------------------------------

# ------------ efficiencies of SFA --------------------------------
# evaluate the efficiency of each observation by the estimates 

eff_sfa_sec1 = frontier::efficiencies(sfa_sec1_homo_log)
eff_sfa_sec2 = frontier::efficiencies(sfa_sec2_homo_log)


# this efficiency calculation function is made by me,
# but basically a part of the efficiencies.frontier function
# I want to estimate the efficiency of the data under scenario 2 using the frontier 
# estimates of scenario 1.
eff_original <- function(object,object2,logDepVar = TRUE, minusU = farrell, 
                         farrell = TRUE, margEff = FALSE){
  fitted <- drop(object2$dataTable[, 4:(3 + 2), drop = FALSE] %*% 
                   object$mleParam[1:2])
  resid <- object2$dataTable[,3] - fitted
  
  sigmaSq <- coef(object)["sigmaSq"]
  gamma <- coef(object)["gamma"]
  lambda <- sqrt(gamma/(1 - gamma))
  
  if (minusU) {
    dir <- 1
  }
  else {
    dir <- -1
  }
  if (object$ineffDecrease) {
    tau <- 1
  }
  else {
    tau <- -1
  }
  
  if (margEff) {
    warning("cannot calculate marginal effects of z variables", 
            " for an error components frontier,", " because this model does not have z variables")
    margEff <- FALSE
  }
  if (object$timeEffect) {
    eta <- coef(object)["time"]
    etaStar <- exp(-eta * (1:object$nt - object$nt))
  }
  else {
    eta <- 0
    etaStar <- rep(1, object$nt)
  }
  if (object$nt == 1) {
    residStar <- drop(resid)
    tStar <- 1
  }
  else {
    residStar <- rep(NA, object$nn)
    tStar <- rep(NA, object$nn)
    for (i in 1:object$nn) {
      residStar[i] <- sum(resid[i, ] * etaStar, na.rm = TRUE)
      tStar[i] <- sum(etaStar[!is.na(resid[i, ])]^2)
    }
  }
  if (object$truncNorm) {
    mu <- coef(object)["mu"]
  }
  else {
    mu <- 0
  }
  
  muStar <- (-tau * gamma * residStar + mu * (1 - gamma))/(1 + (tStar - 1) * gamma)
  sigmaStarSq <- sigmaSq * gamma * (1 - gamma)/(1 + (tStar - 1) * gamma)
  sigmaStar <- sqrt(sigmaStarSq)
  result <- matrix(NA, nrow = object$nn, ncol = object$nt^object$timeEffect)
  for (j in 1:ncol(result)) {
    result[, j] <- exp(pnorm(-dir * sigmaStar * etaStar[j] + 
                               muStar/sigmaStar, log.p = TRUE) - 
                         pnorm(muStar/sigmaStar, log.p = TRUE)) * 
      exp(-dir * muStar * etaStar[j] + 0.5 * sigmaStarSq * etaStar[j]^2)
  }
  result
}


# calculate efficiencies 
# efficiency of scenario 2 evaluated by frontier of scenario 1
# we want to calculate efficiency score of data scenario 2 using the estimates of scenario 1
# "use correct estimates to evaluate the actual data" can be misleading".
##temp= eff_original(sfa_sec1_homo_log,sfa_sec1_homo_log)
##temp2= efficiencies(sfa_sec1_homo_log)
##plot(temp,eff_sfa_sec2_by1)

eff_sfa_sec2_by1= eff_original(sfa_sec1_homo_log,sfa_sec2_homo_log)
eff_sfa_sec2_by1_hetero= eff_original(sfa_sec1_hetero_log_fe,sfa_sec2_hetero_log)

## summary(eff_sfa_sec2_by1)
## summary(eff_sfa_sec2_by1_hetero)


# efficiencies for hetergenious scenario (3 and 4)
eff_sfa_sec1_hetero = frontier::efficiencies(sfa_sec1_hetero_log)
eff_sfa_sec2_hetero = frontier::efficiencies(sfa_sec2_hetero_log)

## summary(eff_sfa_sec1_hetero)
## head(eff_sfa_sec1_hetero)

# efficiencies by DEA
eff_dea_sec1_homo = Benchmarking::eff(dea_sec1_homo)
eff_dea_sec2_homo = Benchmarking::eff(dea_sec2_homo)
eff_dea_sec1_hetero = Benchmarking::eff(dea_sec1_hetero)
eff_dea_sec2_hetero = Benchmarking::eff(dea_sec2_hetero)

# trip level data merged with efficiencies
data_trip_sec1_eff = data.frame(data_trip_sec1,
                                eff_sfa_sec1,
                                eff_sfa_sec1_hetero,
                                eff_dea_sec1_homo = eff_dea_sec1_homo,
                                eff_dea_sec1_hetero = eff_dea_sec1_hetero) 

data_trip_sec2_eff = data.frame(data_trip_sec2,eff_sfa_sec2_by1, 
                                eff_sfa_sec2 = eff_sfa_sec2,
                                eff_dea_sec2_homo = eff_dea_sec2_homo) %>%
  group_by(day) %>%
  mutate(rel_fresh_loss = fresh_loss/max(fresh_loss)) %>%
  ungroup()

data_trip_sec2_eff_hetero = data.frame(data_trip_sec2_hetero,
                                       eff_sfa_sec2_hetero,
                                       eff_sfa_sec2_by1_hetero,
                                       eff_dea_sec2_hetero = eff_dea_sec2_hetero
)


# comparison of "true" efficiency dist, and effiency of sec2 evaluated by sec1

# data of efficiencies evaluate by each frontier (sec1 data for sec1 frontier, sec2 data for sec2 frontier)
data_eff_dist_homo = bind_rows(data_trip_sec1_eff[,c("ves","trip","day","efficiency")] %>% 
                                 mutate(data = "Sec.1", frontier = "Sec.1"),
                               data_trip_sec2_eff[,c("ves","trip","day","efficiency")] %>% 
                                 mutate(data = "Sec.2", frontier = "Sec.2"),
                               data_trip_sec2_eff[,c("ves","trip","day","eff_sfa_sec2_by1")] %>%
                                 rename("efficiency" = "eff_sfa_sec2_by1") %>%
                                 mutate(data = "Sec.2", frontier = "Sec.1")
) %>%
  # add category for legend
  mutate(legend = ifelse(data == "Sec.1" & frontier == "Sec.1",
                         "Planned data & Planned frontier (Benchmark)",
                         ifelse(data == "Sec.2" & frontier == "Sec.2",
                                "Endo.data & Endo. frontier", 
                                ifelse(data == "Sec.2" & frontier == "Sec.1", "Endo. data & Planned frontier",NA))))

data_eff_dist_hetero = bind_rows(data_trip_sec1_eff[,c("ves","trip","day","efficiency.1")] %>% 
                                   rename("efficiency" = "efficiency.1") %>% 
                                   mutate(data = "Sec.1", frontier = "Sec.1"),
                                 data_trip_sec2_eff_hetero[,c("ves","trip","day","efficiency")] %>% 
                                   mutate(data = "Sec.2", frontier = "Sec.2"),
                                 data_trip_sec2_eff_hetero[,c("ves","trip","day","eff_sfa_sec2_by1_hetero")] %>% 
                                   rename("efficiency" = "eff_sfa_sec2_by1_hetero") %>% 
                                   mutate(data = "Sec.2", frontier = "Sec.1")
)%>%
  # add category for legend
  mutate(legend = ifelse(data == "Sec.1" & frontier == "Sec.1",
                         "Planned data & Planned frontier (Benchmark)",
                         ifelse(data == "Sec.2" & frontier == "Sec.2",
                                "Endo.data & Endo. frontier", 
                                ifelse(data == "Sec.2" & frontier == "Sec.1", "Endo. data & Planned frontier",NA))))


# Plot: distribution comparison
plot_eff_dist_homo = ggplot(data_eff_dist_homo, aes(x = efficiency, col = data, linetype = frontier)) +
  stat_density(position = "identity", geom = "line", alpha = 0.8)+ 
  ylim(0,9) + 
  labs(title = "Homogeneous technology", x = "Efficiency", y = "Density",
       col = "Data", linetype = "Estimated\nFrontier") +
  scale_linetype_discrete(labels = c("Planned","Endogeneous")) + 
  scale_color_discrete(labels = c("Planned","Endogeneous")) +
  theme_bw() + 
  theme(legend.position = "bottom")

# Plot: same as above, but publication version (for grey scale)
plot_eff_dist_homo_pub = ggplot(data_eff_dist_homo, 
                                aes(x = efficiency, linetype = legend)) +
  stat_density(position = "identity", geom = "line", alpha = 0.8)+ 
  ylim(0,9) + 
  scale_linetype_manual(values = c("solid","dotted","twodash"),
                        breaks = c("Planned data & Planned frontier (Benchmark)",
                                   "Endo.data & Endo. frontier",
                                   "Endo. data & Planned frontier")) +
  labs(title = "Homogeneous technology", x = "Efficiency", y = "Density",linetype = "") +
  theme_bw() + 
  theme(legend.position = "bottom",
        plot.title = element_text(size=16),
        text = element_text(family = "Times New Roman"))


# Plot: distribution comparison, heterogeneous case
plot_eff_dist_hetero = ggplot(data_eff_dist_hetero, aes(x = efficiency, col = data, linetype = frontier)) +
  stat_density(position = "identity", geom = "line", alpha = 0.8)+ 
  ylim(0,9) + 
  labs(title = "Heterogeneous technology", x = "Efficiency", y = "Density",
       col = "Data", linetype = "Estimated\nFrontier")+
  scale_linetype_discrete(labels = c("Planned","Endogeneous")) + 
  scale_color_discrete(labels = c("Planned","Endogeneous")) +
  theme_bw()+ 
  theme(legend.position = "bottom")


# Plot: hetero case. same as above, for publication 
plot_eff_dist_hetero_pub = ggplot(data_eff_dist_hetero, 
                                  aes(x = efficiency, linetype = legend)) +
  stat_density(position = "identity", geom = "line", alpha = 0.8)+ 
  ylim(0,9) + 
  scale_linetype_manual(values = c("solid","dotted","twodash"),
                        breaks = c("Planned data & Planned frontier (Benchmark)",
                                   "Endo.data & Endo. frontier",
                                   "Endo. data & Planned frontier")) +
  labs(title = "Homogeneous technology", x = "Efficiency", y = "Density",linetype = "") +
  theme_bw() + 
  theme(legend.position = "bottom",
        plot.title = element_text(size=16),
        text = element_text(family = "Times New Roman"))





# data to compare the distribution of efficiency by day
# using the frontier 1 for both data
data_eff_dist_by1 = data_eff_dist_homo %>% 
  filter(frontier == "Sec.1")



## ----eff_com_plot, eval = TRUE, fig.cap="\\label{fig:eff_comp}Evaluated efficiencies of trips under each scenario using the correct frontier." ,fig.width = 8,fig.height=6----
# by day, sec1frontier
# ****** plot used in the paper *******
plot_eff_comp = ggplot(data_eff_dist_by1, aes(x = day, y = efficiency, col = factor(data), 
                                              shape =factor(data), group = data)) +
  geom_point(size=1) +
  scale_color_manual(values = c("grey80","black"), label = c("Planned","Endogeneous")) + 
  scale_shape_manual(values = c(4,16), label = c("Planned","Endogeneous")) +
  labs(col = "Data", shape = "Data", x = "Trip Days",y = "Efficiency") + 
  theme_bw() + 
  theme(legend.position = "bottom",
        text = element_text(family = "Times New Roman"))

plot_eff_comp


## ----eff_comp_pub, eval = FALSE----------------------------------------
## cairo_ps("figures/pub/eff_comp.eps", width = 8, height = 6)
##   plot_eff_comp
## dev.off()
## 


## ----eff_dist_plot, eval = TRUE, fig.cap="\\label{fig:eff_dist_plot}The distribution of evaluated efficiencies under each scenario using the correct and wrong frontiers." ,fig.width = 10,fig.height=6----
# ****** plot used in the paper *******
plot_eff_dist_homo + plot_eff_dist_hetero + plot_layout(guides = 'collect') &
  theme(legend.position='bottom',
        legend.text=element_text(size=rel(1.2)))


## ----eff_dist_plot_pub, eval = TRUE------------------------------------
# ****** plot used in the paper (publication) *******
plot_eff_dist_pub = plot_eff_dist_homo_pub + plot_eff_dist_hetero_pub + plot_layout(guides = 'collect') &
  theme(legend.position='bottom',
        legend.text=element_text(size=rel(1.1)))


# EPS
cairo_ps(file = "figures/pub/eff_dist_compare.eps", 
         width = 10, height = 6)
plot_eff_dist_pub
dev.off()

# TIFF
ggsave("figures/pub/eff_dist_compare.tiff", plot=plot_eff_dist_pub,
       device = "tiff", 
       width = 10,height=6)

ggsave("figures/pub/eff_dist_compare.png", plot=plot_eff_dist_pub,
       device = "png", 
       width = 10,height=6)


## ----dist_eff_type_plot_setup, eval = TRUE-----------------------------
# Distribution plot for heterogeneity data, by good or bad vessels
# change label
# New facet label names for dose variable
data.labs <- c("Data: Planned", "Data: Endogeneous")
names(data.labs) <- c("Sec.1", "Sec.2")
frontier.labs <- c("Frontier: Planned", "Frontier: Endogeneous")
names(frontier.labs) <- c("Sec.1", "Sec.2")


## ----dist_eff_type_plot, eval = TRUE, fig.cap="\\label{fig:eff_dist_type}The distributions of evaluated efficiencies by type under each scenario and by correct and wrong frontiers for heterogeneous setting." ,fig.width = 10,fig.height=8----
# ******* PLot used in the paper ******
plot_eff_dist_hetero_by_type = ggplot(data_eff_dist_hetero, aes(x = efficiency, col = factor(ves%%2))) +
  stat_density(position = "identity", geom = "line", alpha = 0.8)+ 
  ylim(0,9) + 
  labs(x = "Efficiency", y = "Density",
       col = "Vessel Type")+
  scale_color_manual(values = c("red","blue"), labels = c("Bad","Good")) +
  theme_bw()+ 
  theme(legend.position = "bottom",
        text = element_text(family = "Times New Roman")) + 
  # facet_grid(frontier~data, labeller = label_both)
  facet_grid(frontier~data, 
             labeller = labeller(data = data.labs,frontier = frontier.labs),
             drop = FALSE)

plot_eff_dist_hetero_by_type


## ----------------------------------------------------------------------
# ******* PLot used in the paper ******
plot_eff_dist_hetero_by_type_pub = ggplot(data_eff_dist_hetero, aes(x = efficiency, linetype = factor(ves%%2))) +
  stat_density(position = "identity", geom = "line", alpha = 0.8)+ 
  ylim(0,9) + 
  labs(x = "Efficiency", y = "Density",
       linetype = "Vessel Type")+
  scale_linetype_manual(values = c("dotted","solid"), labels = c("Bad","Good")) +
  theme_bw()+ 
  theme(legend.position = "bottom",
        legend.text=element_text(size=rel(1.1)),
        text = element_text(family = "Times New Roman"),
        strip.background = element_rect(fill = "white")) + 
  # facet_grid(frontier~data, labeller = label_both)
  facet_grid(frontier~data, 
             labeller = labeller(data = data.labs,frontier = frontier.labs))


# EPS
cairo_ps(file = "figures/pub/eff_dist_type_plot.eps", 
         width = 10, height = 8)
plot_eff_dist_hetero_by_type_pub
dev.off()


# TIFF
ggsave("figures/pub/eff_dist_type_plot.tiff", plot=plot_eff_dist_hetero_by_type_pub,
       device = "tiff", 
       width = 10,height=9)

# PNG
ggsave("figures/pub/eff_dist_type_plot.png", plot=plot_eff_dist_hetero_by_type_pub,
       device = "png", 
       width = 10,height=9)